function [beta,b,r_history,obj_history]=update_beta(train_data,test_data,Y,beta_last,b_last,lambda1,IFB)
% update beta by Alternating Direction Method of Multipliers (ADMM)
rho=1;
beta=beta_last;
b=b_last;
% merge the training and test data
union.data=[train_data.data;test_data.data];
union.label=[train_data.label;Y];
union.taskno=[train_data.taskno;test_data.taskno];
l=length(union.label);
task_num=size(beta_last,2);
Z=zeros(l,1);
u = zeros(l,1);
r_min =9999;
count =0;
max_iter=100;
tolerance=10e-3;
max_count=50;
phi=beta(:,2:task_num);
v=zeros(size(phi));
for iter = 1:max_iter
    % S-update
    for i=1:task_num
    Z(union.taskno==i)=union.data(union.taskno==i,IFB{i})*beta(IFB{i},i)+b(i)-u(union.taskno==i);
     end
    S = update_S(union.label,rho,Z);
    % (beta,phi,b)-update
    [beta(:,1:task_num-1),phi,b]=update_phi(lambda1,S,union.data,rho,u,v,union.taskno,task_num,phi,beta,b,IFB);
    % beta-update
    [beta(:,task_num),b(task_num)]=update_last_beta(S,union.data,u,v(:,task_num-1),union.taskno,task_num,phi(:,task_num-1));
    % compute  residual
    Z=Z+u;
    temp=Z;
    for i=1:task_num
    temp(union.taskno==i)=union.data(union.taskno==i,:)*beta(:,i)+b(i);
     end
    r1 = S -temp;         
    r2= beta(:,2:end)-phi;
    % dual variables u and v update
    u = u + r1;
    v= v + r2;
    r=r1;
    for i=1:task_num-1
        r=[r;r2(IFB{i},i)];
    end
    r=norm(r);
    r_history(iter)=r;
    obj=obj_value(beta,b,lambda1,union,IFB);
    obj_history(iter)=obj;
    % record the smallest r
        if ( r<r_min)
            r_min=r;
            count =0;
        else   count = count +1;
        end
    %  termination checks
    if (r<=tolerance) || count ==max_count
        break;
    end
end
end

function S=update_S(label,rho,Z)
S=Z+label/(rho*length(label)).*max(sign(1-label.*Z),0);
end

function [beta,phi,b] = update_phi(lambda1,S,data,rho,u,v,taskno,task_num,phi,beta,b,IFB)
opts.init = 0;      % guess start point from data. 
opts.tFlag = 1;     % terminate after relative objective value does not changes much.
opts.tol = 10^-3;   % tolerance. 
opts.maxIter = 1000; % maximum iteration number of optimization.
opts.rho_L2=0;
for i=1:task_num-1 %#ok<ALIGN>
     X=cell(1,2);
    Y=cell(1,2);
        X{1}=data(taskno==i,IFB{i+1});
        fea_num=length(IFB{i+1});
        X{2}=eye(fea_num);
        Y{2}=beta(IFB{i+1},i+1)+v(IFB{i+1},i);
        Y{1}=S(taskno==i)+u(taskno==i);
        opts.C0=b(i);
        opts.W0=[beta(IFB{i+1},i),phi(IFB{i+1},i)];
        [W,b(i),~]=least_L21(X, Y, lambda1/rho, opts);
        beta(IFB{i+1},i)=W(:,1);
        phi(IFB{i+1},i)=W(:,2);
        end
beta(:,task_num)=[];
end

function [beta,b]=update_last_beta(S,data,u,v,taskno,task_num,phi)
data=data(taskno==task_num,:);
data=[ones(size(data,1),1),data];
fea_num=size(data,2);
S=S(taskno==task_num);
u=u(taskno==task_num);
I=eye(fea_num);
I(1,1)=0;
phi=[0;phi];
v=[0;v];
beta=(I+data'*data)\(phi-v+data'*(S+u));
b=beta(1);
beta(1)=[];
end
function obj=obj_value(beta,b,lambda1,union,IFB)
obj=0;
task_num=length(b);
for i=1:task_num
    obj=obj+1/length(union.label)*sum(max(0,1-union.label(union.taskno==i).*(union.data(union.taskno==i,IFB{i})*beta(IFB{i},i)+b(i))));
end
for i=2:task_num
    for j=1:length(IFB{i})
    obj=obj+lambda1*norm(beta(IFB{i}(j),i-1:i),2);
    end
end
end